//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31968024
// Cuda compilation tools, release 12.0, V12.0.76
// Based on NVVM 7.0.1
//

.version 8.0
.target sm_52
.address_size 64

.const .align 16 .b8 params[384];

.visible .func  (.param .align 8 .b8 func_retval0[32]) __direct_callable__oxMain(
	.param .b32 __direct_callable__oxMain_param_0,
	.param .align 8 .b8 __direct_callable__oxMain_param_1[8]
)
{
	.reg .pred 	%p<13>;
	.reg .b16 	%rs<57>;
	.reg .f32 	%f<144>;
	.reg .b32 	%r<34>;
	.reg .b64 	%rd<52>;


	ld.param.u32 	%r3, [__direct_callable__oxMain_param_0];
	ld.param.f32 	%f37, [__direct_callable__oxMain_param_1+4];
	ld.param.f32 	%f36, [__direct_callable__oxMain_param_1];
	ld.const.u64 	%rd8, [params+56];
	cvta.to.global.u64 	%rd9, %rd8;
	mul.wide.s32 	%rd10, %r3, 4;
	add.s64 	%rd11, %rd9, %rd10;
	ld.global.u32 	%r4, [%rd11];
	setp.ne.s32 	%p1, %r4, 0;
	and.b32  	%r5, %r4, 1;
	setp.eq.b32 	%p2, %r5, 1;
	not.pred 	%p3, %p2;
	mov.f32 	%f138, 0f00000000;
	and.pred  	%p4, %p1, %p3;
	mov.f32 	%f139, %f138;
	mov.f32 	%f140, %f138;
	mov.f32 	%f141, %f138;
	mov.f32 	%f142, %f138;
	mov.f32 	%f134, %f138;
	@%p4 bra 	$L__BB0_13;

	ld.const.u64 	%rd12, [params+8];
	cvta.to.global.u64 	%rd13, %rd12;
	mul.wide.u32 	%rd14, %r3, 12;
	add.s64 	%rd15, %rd13, %rd14;
	ld.global.s32 	%rd1, [%rd15];
	ld.global.s32 	%rd2, [%rd15+4];
	ld.global.s32 	%rd3, [%rd15+8];
	ld.const.u32 	%r1, [params+16];
	setp.gt.u32 	%p5, %r1, %r3;
	@%p5 bra 	$L__BB0_5;
	bra.uni 	$L__BB0_2;

$L__BB0_5:
	mov.f32 	%f134, 0f3F800000;
	bra.uni 	$L__BB0_6;

$L__BB0_2:
	sub.s32 	%r6, %r3, %r1;
	ld.const.u64 	%rd16, [params+32];
	cvta.to.global.u64 	%rd17, %rd16;
	mul.wide.u32 	%rd18, %r6, 4;
	add.s64 	%rd19, %rd17, %rd18;
	ld.const.u64 	%rd20, [params+24];
	cvta.to.global.u64 	%rd21, %rd20;
	shl.b64 	%rd22, %rd1, 3;
	add.s64 	%rd23, %rd21, %rd22;
	ld.global.v2.f32 	{%f44, %f45}, [%rd23];
	shl.b64 	%rd24, %rd2, 3;
	add.s64 	%rd25, %rd21, %rd24;
	ld.global.v2.f32 	{%f48, %f49}, [%rd25];
	shl.b64 	%rd26, %rd3, 3;
	add.s64 	%rd27, %rd21, %rd26;
	ld.global.v2.f32 	{%f52, %f53}, [%rd27];
	mov.f32 	%f56, 0f3F800000;
	sub.f32 	%f57, %f56, %f36;
	sub.f32 	%f58, %f57, %f37;
	mul.f32 	%f59, %f36, %f48;
	mul.f32 	%f60, %f36, %f49;
	fma.rn.f32 	%f61, %f58, %f44, %f59;
	fma.rn.f32 	%f62, %f58, %f45, %f60;
	fma.rn.f32 	%f3, %f37, %f52, %f61;
	fma.rn.f32 	%f4, %f37, %f53, %f62;
	ld.global.u32 	%r2, [%rd19];
	and.b32  	%r7, %r2, -16384;
	setp.eq.s32 	%p6, %r7, 16384;
	@%p6 bra 	$L__BB0_4;
	bra.uni 	$L__BB0_3;

$L__BB0_4:
	add.s32 	%r19, %r2, -16384;
	ld.const.u64 	%rd36, [params+48];
	cvta.to.global.u64 	%rd37, %rd36;
	mul.wide.u32 	%rd38, %r19, 8;
	add.s64 	%rd39, %rd37, %rd38;
	ld.global.u64 	%rd40, [%rd39];
	tex.2d.v4.f32.f32 	{%f73, %f74, %f75, %f76}, [%rd40, {%f3, %f4}];
	cvt.sat.f32.f32 	%f77, %f73;
	mul.f32 	%f78, %f77, 0f437F0000;
	cvt.rzi.s32.f32 	%r20, %f78;
	cvt.sat.f32.f32 	%f79, %f74;
	mul.f32 	%f80, %f79, 0f437F0000;
	cvt.rzi.s32.f32 	%r21, %f80;
	cvt.sat.f32.f32 	%f81, %f75;
	mul.f32 	%f82, %f81, 0f437F0000;
	cvt.rzi.s32.f32 	%r22, %f82;
	shl.b32 	%r23, %r21, 8;
	or.b32  	%r24, %r23, %r20;
	shl.b32 	%r25, %r22, 16;
	or.b32  	%r26, %r24, %r25;
	not.b32 	%r27, %r26;
	cvt.rn.f32.s32 	%f134, %r27;
	bra.uni 	$L__BB0_13;

$L__BB0_3:
	abs.f32 	%f63, %f3;
	cvt.rmi.f32.f32 	%f64, %f63;
	sub.f32 	%f65, %f63, %f64;
	abs.f32 	%f66, %f4;
	cvt.rmi.f32.f32 	%f67, %f66;
	sub.f32 	%f68, %f66, %f67;
	ld.const.u64 	%rd28, [params+40];
	cvta.to.global.u64 	%rd29, %rd28;
	shl.b32 	%r8, %r2, 4;
	cvt.u64.u32 	%rd30, %r8;
	and.b64  	%rd31, %rd30, 1048560;
	add.s64 	%rd32, %rd29, %rd31;
	ld.global.v2.u32 	{%r9, %r10}, [%rd32];
	cvt.rn.f32.u32 	%f69, %r9;
	mul.f32 	%f70, %f65, %f69;
	cvt.rzi.u32.f32 	%r13, %f70;
	cvt.rn.f32.u32 	%f71, %r10;
	mul.f32 	%f72, %f68, %f71;
	cvt.rzi.u32.f32 	%r14, %f72;
	mad.lo.s32 	%r15, %r9, %r14, %r13;
	cvt.u64.u32 	%rd33, %r15;
	ld.global.u64 	%rd34, [%rd32+8];
	add.s64 	%rd35, %rd34, %rd33;
	ld.u8 	%r16, [%rd35];
	shr.u32 	%r17, %r2, 16;
	and.b32  	%r18, %r17, %r16;
	setp.eq.s32 	%p7, %r18, 0;
	selp.f32 	%f134, 0f00000000, 0f3F800000, %p7;

$L__BB0_6:
	cvt.u32.u64 	%r28, %rd1;
	ld.const.u64 	%rd41, [params];
	cvta.to.global.u64 	%rd4, %rd41;
	mul.wide.s32 	%rd42, %r28, 32;
	add.s64 	%rd43, %rd4, %rd42;
	add.s64 	%rd5, %rd43, 24;
	ld.global.v2.f32 	{%f141, %f90}, [%rd43+24];
	setp.geu.f32 	%p8, %f141, 0f00000000;
	ld.const.u32 	%r29, [params+340];
	setp.ne.s32 	%p9, %r29, 0;
	or.pred  	%p10, %p8, %p9;
	@%p10 bra 	$L__BB0_9;

	div.rn.f32 	%f91, %f141, 0f41200000;
	cvt.rzi.s32.f32 	%r30, %f91;
	neg.s32 	%r31, %r30;
	ld.const.u64 	%rd44, [params+224];
	cvta.to.global.u64 	%rd45, %rd44;
	mul.wide.s32 	%rd46, %r31, 16;
	add.s64 	%rd47, %rd45, %rd46;
	ld.global.f32 	%f92, [%rd47+8];
	setp.geu.f32 	%p11, %f92, 0f00000000;
	@%p11 bra 	$L__BB0_9;

	mov.f32 	%f134, 0f00000000;

$L__BB0_9:
	cvt.u32.u64 	%r32, %rd2;
	mul.wide.s32 	%rd48, %r32, 32;
	add.s64 	%rd49, %rd4, %rd48;
	add.s64 	%rd6, %rd49, 24;
	cvt.u32.u64 	%r33, %rd3;
	mul.wide.s32 	%rd50, %r33, 32;
	add.s64 	%rd51, %rd4, %rd50;
	add.s64 	%rd7, %rd51, 24;
	setp.lt.f32 	%p12, %f90, 0f00000000;
	@%p12 bra 	$L__BB0_11;
	bra.uni 	$L__BB0_10;

$L__BB0_11:
	add.f32 	%f142, %f90, 0f3F800000;
	mov.f32 	%f108, 0f3F800000;
	sub.f32 	%f109, %f108, %f36;
	sub.f32 	%f135, %f109, %f37;
	bra.uni 	$L__BB0_12;

$L__BB0_10:
	ld.global.v2.f32 	{%f94, %f95}, [%rd6];
	ld.global.v2.f32 	{%f96, %f97}, [%rd7];
	mov.f32 	%f99, 0f3F800000;
	sub.f32 	%f100, %f99, %f36;
	sub.f32 	%f135, %f100, %f37;
	mul.f32 	%f101, %f36, %f94;
	mul.f32 	%f103, %f36, %f95;
	fma.rn.f32 	%f104, %f135, %f141, %f101;
	fma.rn.f32 	%f105, %f135, %f90, %f103;
	fma.rn.f32 	%f141, %f37, %f96, %f104;
	fma.rn.f32 	%f142, %f37, %f97, %f105;

$L__BB0_12:
	ld.global.f32 	%f110, [%rd5+-12];
	ld.global.f32 	%f111, [%rd5+-8];
	ld.global.f32 	%f112, [%rd5+-4];
	ld.global.f32 	%f113, [%rd6+-12];
	mul.f32 	%f114, %f36, %f113;
	ld.global.f32 	%f115, [%rd6+-8];
	mul.f32 	%f116, %f36, %f115;
	ld.global.f32 	%f117, [%rd6+-4];
	mul.f32 	%f118, %f36, %f117;
	fma.rn.f32 	%f119, %f135, %f110, %f114;
	fma.rn.f32 	%f120, %f135, %f111, %f116;
	fma.rn.f32 	%f121, %f135, %f112, %f118;
	ld.global.f32 	%f122, [%rd7+-12];
	ld.global.f32 	%f123, [%rd7+-8];
	ld.global.f32 	%f124, [%rd7+-4];
	fma.rn.f32 	%f125, %f37, %f122, %f119;
	fma.rn.f32 	%f126, %f37, %f123, %f120;
	fma.rn.f32 	%f127, %f37, %f124, %f121;
	mul.f32 	%f128, %f126, %f126;
	fma.rn.f32 	%f129, %f125, %f125, %f128;
	fma.rn.f32 	%f130, %f127, %f127, %f129;
	sqrt.rn.f32 	%f131, %f130;
	rcp.rn.f32 	%f132, %f131;
	mul.f32 	%f140, %f127, %f132;
	mul.f32 	%f139, %f126, %f132;
	mul.f32 	%f138, %f125, %f132;

$L__BB0_13:
	st.param.f32 	[func_retval0+0], %f138;
	st.param.f32 	[func_retval0+4], %f139;
	st.param.f32 	[func_retval0+8], %f140;
	st.param.v4.b8 	[func_retval0+12], {%rs17, %rs18, %rs19, %rs20};
	st.param.f32 	[func_retval0+16], %f141;
	st.param.f32 	[func_retval0+20], %f142;
	st.param.f32 	[func_retval0+24], %f134;
	st.param.v4.b8 	[func_retval0+28], {%rs21, %rs22, %rs23, %rs24};
	ret;

}
	// .globl	oxMain
.visible .entry oxMain()
{
	.reg .b64 	%rd<2>;


	mov.u64 	%rd1, __direct_callable__oxMain;
	// begin inline asm
	// end inline asm
	ret;

}

